{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "from selenium import webdriver\n",
    "from bs4 import BeautifulSoup\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver = webdriver.Chrome(r'G:\\chromedriver.exe')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "cities = [{\"name\": \"北京\", \"code\": 101010100, \"url\": \"/beijing/\"},\n",
    "{\"name\": \"上海\", \"code\": 101020100, \"url\": \"/shanghai/\"},\n",
    "{\"name\": \"广州\", \"code\": 101280100, \"url\": \"/guangzhou/\"},\n",
    "{\"name\": \"深圳\", \"code\": 101280600, \"url\": \"/shenzhen/\"},\n",
    "{\"name\": \"杭州\", \"code\": 101210100, \"url\": \"/hangzhou/\"},\n",
    "{\"name\": \"天津\", \"code\": 101030100, \"url\": \"/tianjin/\"},\n",
    "{\"name\": \"苏州\", \"code\": 101190400, \"url\": \"/suzhou/\"},\n",
    "{\"name\": \"武汉\", \"code\": 101200100, \"url\": \"/wuhan/\"},\n",
    "{\"name\": \"厦门\", \"code\": 101230200, \"url\": \"/xiamen/\"},\n",
    "{\"name\": \"长沙\", \"code\": 101250100, \"url\": \"/changsha/\"},\n",
    "{\"name\": \"成都\", \"code\": 101270100, \"url\": \"/chengdu/\"},\n",
    "{\"name\": \"郑州\", \"code\": 101180100, \"url\": \"/zhengzhou/\"},\n",
    "{\"name\": \"重庆\", \"code\": 101040100, \"url\": \"/chongqing/\"},\n",
    "{\"name\": \"青岛\", \"code\": 101120200, \"url\": \"/qingdao/\"},\n",
    "{\"name\": \"南京\", \"code\": 101190100, \"url\": \"/nanjing/\"}]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "for city in cities:\n",
    "    urls = ['https://www.zhipin.com/c{}/?query=数据分析&page={}&ka=page-{}'.format(city['code'], i, i) for i in range(1, 8)]\n",
    "    for url in urls:\n",
    "        driver.get(url)\n",
    "        html = driver.page_source\n",
    "        bs = BeautifulSoup(html, 'html.parser')\n",
    "        job_all = bs.find_all('div', {\"class\": \"job-primary\"})\n",
    "        for job in job_all:\n",
    "             position = job.find('span', {\"class\": \"job-name\"}).get_text()\n",
    "             address = job.find('span', {'class': \"job-area\"}).get_text()\n",
    "             company = job.find('div', {'class': 'company-text'}).find('h3', {'class': \"name\"}).get_text()\n",
    "             salary = job.find('span', {'class': 'red'}).get_text()\n",
    "             diploma = job.find('div', {'class': 'job-limit'}).find('p').get_text()[-2:]\n",
    "             experience = job.find('div', {'class': 'job-limit'}).find('p').get_text()[:-2]\n",
    "             labels = job.find('a', {'class': 'false-link'}).get_text()\n",
    "             with open('position.csv', 'a+', encoding='UTF-8-SIG') as f_obj:\n",
    "                f_obj.write(position.replace(',', '、') + \",\" + address + \",\" + company + \",\" + salary + \",\" + diploma + \",\" + experience + ',' + labels + \"\\n\")    \n",
    "driver.quit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
